#!/usr/bin/perl

# Copyright (C) 2010 Modestas Vainius <modax@debian.org>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>

use strict;
use warnings;

use Debian::PkgKde;
use Getopt::Long;
use File::Copy qw();
use File::Temp qw();
use File::Spec;
use IO::Uncompress::Inflate qw(inflate);
use IO::Uncompress::Gunzip qw(gunzip);

# Load extra modules (from libwww-perl and its dependencies)
INIT {
    eval "use HTTP::Request";
    if ($@) {
	error "in order to use this utility, you have to install libwww-perl package";
    }
    eval "use URI; use URI::QueryParam; use URI::Escape;";
    eval "use HTTP::Response; use LWP::UserAgent; use HTML::LinkExtor; use HTML::Parser;";
}

sub usage {
    usageerr "[ -d destdir ] [ -v version ] [ -a arch ] [ -o ] [ package ] [ distribution ]";
}

sub construct_url {
    my ($path, %params) = @_;

    my $url = URI->new($path);
    foreach my $param (keys %params) {
	my @value = (ref $params{$param} eq "ARRAY") ?
	    @{$params{$param}} : ( $params{$param} );
	$url->query_param_append($param, @value);
    }
    return $url->as_string();
}

sub as_array {
    my $scalar = shift;
    my @ret;
    if (defined $scalar) {
	if (ref($scalar) eq 'ARRAY') {
	    @ret = @$scalar;
	} else {
	    push @ret, $scalar;
	}
    }
    return @ret;
}

sub in_array {
    my $value = shift;
    foreach my $member (@_) {
	return 1 if $member eq $value;
    }
    return 0;
}

sub get_command_output {
    my @lines;
    open(my $cmd, "-|", @_) or syserr("unable to execute command %s", $_[0]);
    while (<$cmd>) {
	chop;
	push @lines, $_;
    }
    close $cmd;
    return @lines;
}

sub get_rfc822_field_value {
    my ($field, $input) = @_;
    foreach my $line (@$input) {
	if ($line =~ /^\Q$field\E:\s*(.*)$/) {
	    return "$1"
	}
    }
}

sub html2text {
    my ($in, $out) = @_;
    my $body;
    my $parser = HTML::Parser->new( api_version => 3,
	start_h => [ sub { if (shift() eq "body") { $body = 1 } }, "tagname" ],
	end_h   => [ sub { if (shift() eq "body") { $body = 0 } }, "tagname" ],
	text_h  => [ sub { if ($body) { print $out shift(); } }, "dtext" ]
    );
    $parser->ignore_elements("head", "a", "img");
    return defined($parser->parse_file($in)) && defined($body);
}

sub fetch_log {
    my ($browser, $uri, %opts) = @_;
    my $ok = 0;
    my @status;
    my ($filename, $file);

    # Construct log filename
    $filename = sprintf("%s_%s_%s_%s.build",
	$uri->query_param("pkg"),
	$uri->query_param("ver"),
	$uri->query_param("arch"),
	$uri->query_param("stamp"));
    $file = File::Spec->catfile($opts{destdir}, $filename)
	if defined $opts{destdir};

    # Check overwrite option
    if (!$opts{overwrite} && -e $file) {
	info "Not overwriting existing build log at $file ...";
    } else {
	# Create a temporary file
	my %tmpfileopts = ( TEMPLATE => $filename . ".XXXXXX" );
	$tmpfileopts{DIR} = $opts{destdir} if exists $opts{destdir};
	my $tmpfile1 = File::Temp->new(%tmpfileopts);
	my $tmpfile2 = File::Temp->new(%tmpfileopts);

	info "Fetching build log to $filename ...";
	my $request = HTTP::Request->new(GET => $uri);
	$request->header("Accept-Encoding" => "gzip, deflate, identity");
	$browser->show_progress(1);
	$tmpfile1->close();
	my $response = $browser->request($request, $tmpfile1->filename);
	if ($response->is_success()) {
	    # Inflate/gunzip contents if needed
	    my $content_encoding = $response->header("Content-Encoding");
	    if (defined $content_encoding && $content_encoding ne "identity") {
		my $uncompress_func;
		if ($content_encoding eq "deflate") {
		    push @status, "decoded:deflate";
		    $uncompress_func = \&inflate;
		} elsif ($content_encoding eq "gzip") {
		    push @status, "decoded:gzip";
		    $uncompress_func = \&gunzip;
		} else {
		    push @status, "unsupported content encoding: $content_encoding";
		}
		if (defined $uncompress_func) {
		    if (&$uncompress_func($tmpfile1->filename => $tmpfile2, BinModeOut => 1)) {
			$tmpfile2->close();
			($tmpfile1, $tmpfile2) = ($tmpfile2, $tmpfile1);
			open($tmpfile2, ">:utf8", $tmpfile2->filename) or
			    syserr "unable to reopen temporary file";
			$ok = 1;
		    }
		}
	    } else {
		# Identity encoding (no compression)
		$ok = 1;
		binmode($tmpfile2, ":utf8");
	    }
	    if ($ok) {
		open($tmpfile1, "<:utf8", $tmpfile1->filename);
		if ($ok = html2text($tmpfile1 => $tmpfile2)) {
		    $tmpfile1->close();
		    $tmpfile1 = $tmpfile2;
		} else {
		    push @status, "html unstripped";
		}
		$tmpfile1->close();
		$tmpfile2->close();
		File::Copy::move($tmpfile1->filename, $file) or
		    error "unable to rename '%s' to '%s'", $tmpfile1->filename, $file;
	    }
	}
    }
    return ($ok, [ $filename, @status ]);
}

sub download_logs {
    my ($pkg, %opts) = @_;
    my $distro = $opts{distro};
    my @arches = as_array($opts{arch});
    my @vers = as_array($opts{ver});
    my $url;

    # Construct index URL
    if (defined $distro) {
	$url = construct_url('https://buildd.debian.org/status/package.php',
	    p => $pkg, suite => $distro);
    } elsif (defined $opts{ver}) {
	if (scalar(@arches) == 1 && scalar(@vers) == 1) {
	    $url = construct_url('https://buildd.debian.org/build.php',
		pkg => $pkg, ver => \@vers, arch => \@arches);
	} else {
	    # Only single ver + single arch query is supported by build.php.
	    # For anything else fallback to url filtering (see below)
	    $url = construct_url('https://buildd.debian.org/build.php', pkg => $pkg);
	}
    } else {
	error "neither version(s) nor distribution was specified";
    }

    # Download index document and extract links
    info "Downloading build log index from $url ...";
    my $browser = LWP::UserAgent->new(
	agent => get_program_name(),
	timeout => 10,
	keep_alive => 1,
	env_proxy => 1,
    );
    my $request = HTTP::Request->new(GET => $url);
    if (my $response = $browser->request($request)) {
	error "unable to access log index at URL $url: ".$response->status_line
	    unless $response->is_success();
	my $urlbase = $url;
	$urlbase =~ s,[^/]*$,,;
	my $linkextor = HTML::LinkExtor->new(undef, $urlbase);
	$linkextor->parse($response->content());
	if (my @links = grep { $_->[0] eq "a" } $linkextor->links()) {
	    @links = map { shift @{$_}; +{ @{$_} }->{href} } @links;
	    my @ok;
	    my @failed;
	    my %latest_logs;

	    # Collect links we need to fetch
	    foreach my $link (@links) {
		# Check if it is the link we need
		if ($link =~ m,/fetch\.(cgi|php)(\?[^/]+)$,) {
		    # We might need to filter out links based on the arch and
		    # version we need
		    my $loguri = URI->new($link);
		    my $ver = $loguri->query_param("ver");
		    my $arch = $loguri->query_param("arch");
		    my $stamp = $loguri->query_param("stamp");
		    next if @vers && !in_array($ver, @vers);
		    next if @arches && !in_array($arch, @arches);

		    # Ensure that we are fetching the latest log for ver/arch
		    my $k = $ver . "_" . $arch;
		    if (!exists $latest_logs{$k} || $latest_logs{$k}{stamp} < $stamp) {
			$latest_logs{$k} = { stamp => $stamp, uri => $loguri };
		    }
		}
	    }

	    # Fetch logs
	    foreach my $k (sort keys %latest_logs) {
		my $loguri = $latest_logs{$k}{uri};
		my ($ok, $status) = fetch_log($browser, $loguri, %opts);
		if ($ok) {
		    push @ok, $status;
		} else {
		    push @failed, $status;
		}
	    }
	    return (@ok || @failed) ?  (\@ok, \@failed) : ();
	}
	return ();
    } else {
	error "unable to access log index URL $url";
    }
}

sub print_summary {
    my $logs = shift;
    my $is_warning = shift;
    my $msg = shift;
    if (@$logs) {
	info $msg, @_ unless $is_warning;
	warning $msg, @_ if $is_warning;
	foreach my $log_info (@$logs) {
	    my ($filename, @info) = @$log_info;
	    if (@info) {
		printmsg "  - %s [%s]", $filename, join(", ", @info);
	    } else {
		printmsg "  - %s", $filename;
	    }
	}
    }
}

my $opt_destdir;
my @opt_versions;
my @opt_archs;
my $opt_force;

# Get and verify options
unless (GetOptions(
	"destdir|d=s" => \$opt_destdir,
	"version|v=s" => \@opt_versions,
	"arch|a=s" => \@opt_archs,
	"force|f!" => \$opt_force))
{
    usage();
}

my ($opt_package, $opt_distro) = @ARGV;
my @dpkg_parsechangelog;

if (!$opt_package && -f "debian/changelog") {
    @dpkg_parsechangelog = get_command_output("dpkg-parsechangelog");
    $opt_package = get_rfc822_field_value("Source", \@dpkg_parsechangelog);
}

if (!$opt_package) {
    errormsg "source package was not specified and could not be autoguessed";
    usage();
}

if ($opt_distro && @opt_versions) {
    errormsg "version and distribution options are mutually exclusive";
    usage();
}

if (!@opt_versions) {
    if (!$opt_distro && -f "debian/changelog") {
	@dpkg_parsechangelog = get_command_output("dpkg-parsechangelog") unless @dpkg_parsechangelog;
	$opt_distro = get_rfc822_field_value("Distribution", \@dpkg_parsechangelog);
	if ($opt_distro eq "UNRELEASED") {
	    # Get distro from the next to current entry
	    $opt_distro = get_rfc822_field_value("Distribution",
		[ get_command_output("dpkg-parsechangelog", "-c1", "-o1") ]);
	}
    }
    if (!$opt_distro) {
	errormsg "neither distribution nor version(s) was specified and could not be autoguessed";
	usage();
    }
}

# Determine destination directory to store logs
unless ($opt_destdir) {
    $opt_destdir = sprintf("%s_%s_logs", $opt_package,
	($opt_distro) ? $opt_distro : $opt_versions[0]);
}

info("Selected output directory for logs: %s/", $opt_destdir);
unless (-d $opt_destdir) {
    mkdir $opt_destdir;
}

my ($ok_logs, $failed_logs) =
    download_logs($opt_package, overwrite => $opt_force, destdir => $opt_destdir,
	distro => $opt_distro, ver => \@opt_versions, arch => \@opt_archs);

if (defined $ok_logs) {
    print_summary $ok_logs, 0, "Successfully downloaded build logs (stored to %s):", $opt_destdir;
    print_summary $failed_logs, 1, "Failed to fetch/ignored the following build logs:";
} else {
    error "no build logs referenced in the build log index";
}

END {
    rmdir $opt_destdir if $opt_destdir && $opt_destdir ne ".";
}

exit 0

# vim: noexpandtab tabstop=8 shiftwidth=4
